package cn.movie;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.net.URL;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.junit.Test;

public class Demo_Detail {

	@Test
	public void test1() throws Exception {
		String s = null;
		String data = null;
		String[] str = null;
		String title = null;
		String type = null;
		String remark1 = null;
		String remark2 = null;
		PrintStream ps = new PrintStream(new FileOutputStream("D:/a/movie/resultMovies.txt", true));
		File file = new File("D:/a/movie/movies.txt");
		FileInputStream fs = new FileInputStream(file);
		BufferedReader buf = new BufferedReader(new InputStreamReader(fs));

		while ((s = buf.readLine()) != null) {
			str = s.split("\\t");
			String url = str[5];
			title = str[2];
			type = str[0];
			System.out.println("tytle:"+title+"====type:"+type);
			Document dom = Jsoup.parse(new URL(url), 30000);
			Elements img = dom.select("#mainpic a[class='nbgnbg'] img");
			String urlImg = img.get(0).attr("src");
			// System.out.println("图片地址" + urlImg);
			String startYear = dom.select("#content span[class='year']").text();
			startYear = startYear.replaceAll("[^0-9]", "");
			// System.out.println("年份" + startYear);
			String maker = dom.select("#info span[class='attrs'] a[rel='v:directedBy']").text();
			// System.out.println("导演" + maker);
			String actor = dom.select("#info span[class='attrs'] a[rel='v:starring']").text();
			// System.out.println("主演" + actor);
			String brief = dom.select("#link-report span").text().replaceAll("\\s+", "");
			String startDate = dom.select("#info span[ property='v:initialReleaseDate']").text();
			// System.out.println("上映日期" + startDate);
			// System.out.println("简介" + brief);
			String evaluae = dom.select("#comments-section span[class='pl']").text();
			evaluae = evaluae.replaceAll("[^0-9]", "");
			// System.out.println("评价条数:" + evaluae);
			if (Integer.parseInt(evaluae) >= 2) {
				remark1 = dom.select("#comments-section span[class='short']").get(0).text();
				remark2 = dom.select("#comments-section span[class='short']").get(1).text();
			} else if (Integer.parseInt(evaluae) == 0) {
				remark1 = "null";
				remark2 = "null";
			} else {
				remark1 = dom.select("#comments-section span[class='short']").get(0).text();
				remark2 = "null";
			}
			// System.out.println("评价" + remark);
			data = type + "\t" + title + "\t" + startYear + "\t" + maker + "\t" + "[" + actor + "]" + "\t" + startDate
					+ "\t" + brief + "\t" + evaluae + "\t" + "{" + remark1 + "||" + remark2 + "}" + "\t" + urlImg;
			ps.println(data);
			System.out.println("1");
		}
		ps.close();
		buf.close();
	}
}
